#replication code for "message or messenger?"####
#arnon, edwards, and li

#R version 4.2.1 (2022-06-23 ucrt)

rm(list = ls())
cat("\014")
set.seed(30030)
#sink(file = "output.txt", append = FALSE) #to create output.txt

library(dplyr) #version 2.2.1
library(estimatr) #version 1.0.0
library(texreg) #version 1.38.6
library(stringr) #version 1.4.1
library(kableExtra) #version 1.3.4
library(xtable) #version 1.8-4
library(ggplot2) #version 3.3.6
library(gridExtra) #version 2.3

#PART 1: people's daily and protest analysis####

#1. load data####

dat <- read.csv("peoplesdaily_protest.csv")
peoples <- read.csv("peoplesdaily_raw.csv")

#2. visualization####

dat$monthyear <- as.Date(dat$monthyear)
peoples$monthyear <- as.Date(peoples$monthyear)

plot <- ggplot(data=dat) +
  aes(x=monthyear) +
  geom_bar(data=subset(peoples,peoples$monthyear < as.Date("2017-06-01")
                       & peoples$monthyear > as.Date("2011-01-01")),aes(y=event, fill = label),
           stat="identity", position = "stack", color="black") +
  geom_smooth(data=dat,aes(y=num_protests/500),size=1.5, method="loess", color="black",
              se=FALSE, span = 0.2) +
  scale_y_continuous(
    name = "People's Daily Protest Pieces",
    sec.axis = sec_axis(~.*500, name="Number of Protests")) +
  xlab("Date") +
  theme_bw() +
  scale_fill_manual(values=c("gray30",
                             "gray80",
                             "white"),
                    labels=c('Accommodative', 
                             'Accusatory',
                             'Neutral')) +
  guides(fill=guide_legend(title="Label")) +
  theme(axis.text.x = element_text(size = 14),
        axis.text.y = element_text(size=14),
        axis.title.x = element_text(size=14),
        axis.title.y = element_text(size=14),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())

###FIGURE 1###
plot

#3. analysis####

mod1a <- lm_robust(num_pieces ~ num_protests, data=dat, se_type = "HC0") 
mod1b <- lm_robust(log(num_pieces+1) ~ log(num_protests), data=dat, se_type = "HC0") 
mod1c <- lm_robust(num_pieces ~ scale(num_protests), data=dat, se_type = "HC0")

###TABLE SI.1###
texreg(list(mod1a,mod1b,mod1c), include.ci = FALSE) 

mod2a <- lm_robust(accuse_pieces ~ violent_protests, data=dat, se_type = "HC0")
mod2b <- lm_robust(log(accuse_pieces+1) ~ log(violent_protests), data=dat, se_type = "HC0") 
mod2c <- lm_robust(accuse_pieces ~ scale(violent_protests), data=dat, se_type = "HC0")

###TABLE SI.2###
texreg(list(mod2a,mod2b,mod2c), include.ci = FALSE) 

mod3a <- lm_robust(accom_pieces ~ violent_protests, data=dat, se_type = "HC0") 
mod3b <- lm_robust(log(accom_pieces+1) ~ log(violent_protests), data=dat, se_type = "HC0") 
mod3c <- lm_robust(accom_pieces ~ scale(violent_protests), data=dat, se_type = "HC0") 

###TABLE SI.3###
texreg(list(mod3a,mod3b,mod3c), include.ci = FALSE) 


#PART 2: survey experiment####
rm(list = ls()) #important to clear for new analysis

#1. load data####

dat <- read.csv("survey_data.csv")

#2. survey diagnostics####

#(1)  external validity: survey vs. population####
#chinese population data from: http://www.stats.gov.cn/

ext_validity <- as.data.frame(matrix(NA, 2, 6))
colnames(ext_validity) <- c("Group", "% Male","% Married","% Rural","Average Income", "% Urban SOE Workers")

ext_validity$Group <- c("China (2018)","Sample (2021)")
ext_validity$`% Male` <- c(round((481358/951684)*100,2), round(mean(dat$male)*100,2))
ext_validity$`% Married` <- c(round((705367/951684)*100,2), round(mean(dat$married, na.rm=T)*100,2))
ext_validity$`% Rural` <- c(59.58, 100-round(mean(dat$urban,na.rm=T)*100,2))
ext_validity$`Average Income` <- c(82413,(15000*443 + 40000*507 + 75000*685 + 230000*659 +
                                            420000*76 + 650000*37 + 1400000*12 + 2000000*7)/
                                     (443+507+685+659+76+37+12+7)) #see table(dat$income)
ext_validity$`% Urban SOE Workers` <- c((5740/43419)*100 , (297/(970+297))*100) #see table(dat$urban, dat$public)


###TABLE SI.4###
print(xtable(ext_validity, caption="Comparing Sample and Population",include.rownames=FALSE))


#(2) balance test####

balance <- function(samp.bal,treat){
  treatnum <- nrow(filter(samp.bal, treat == 1))
  controlnum <- nrow(filter(samp.bal, treat == 0))
  
  names <- colnames(samp.bal)
  n <- ncol(samp.bal)
  diff_mean <- array(NA,c(1,n))
  se <- array(NA,c(1,n))
  t <- array(NA,c(1,n))
  p <- array(NA,c(1,n))
  
  for (i in 1:n){
    diff_mean[i] <- mean(filter(samp.bal, treat == 1)[,i],na.rm=T) - 
      mean(filter(samp.bal, treat == 0)[,i],na.rm=T)
    se[i] <- sqrt(var(filter(samp.bal, treat == 1)[,i],na.rm=T)/treatnum +
                    var(filter(samp.bal, treat == 0)[,i],na.rm=T)/controlnum)
    t[i] <- diff_mean[i]/se[i]
    p[i] <- 2*pt(-abs(t[i]), df=pmin(treatnum,controlnum)-1)
  }
  t.bal <- t(rbind(names,diff_mean,t,se,p))
  colnames(t.bal) <- c("variable","diff_mean","t","se","p")
  t.bal[,-1] <- round(as.numeric(t.bal[,-1]),4)
  t.bal
}

dat.balance <- dat[,c(1:17,39:41,43)]

bal_source <- as.data.frame(balance(samp.bal = dat.balance, treat = dat.balance$official_source))[-18:-20,]
bal_accuse <- as.data.frame(balance(samp.bal = dat.balance, treat = dat.balance$accusatory_label))[-18:-20,]
bal_accom <- as.data.frame(balance(samp.bal = dat.balance, treat = dat.balance$accommodative_label))[-18:-20,]

rownames(bal_source) <- NULL
rownames(bal_accuse) <- NULL
rownames(bal_accom) <- NULL

###TABLE SI.5###
kable(bal_source, "latex", booktabs = T, linesep = "")

###TABLE SI.6###
kable(bal_accuse, "latex", booktabs = T, linesep = "")

###TABLE SI.7###
kable(bal_accom, "latex", booktabs = T, linesep = "")


#(3) compliance and attention tests, attrition rate####

compliance_rate_1 <- sum(dat$city_check_all, na.rm=T)/nrow(subset(dat,!is.na(dat$city_check_all)))
compliance_rate_2 <- sum(dat$source_check_final, na.rm=T)/nrow(subset(dat,!is.na(dat$source_check_final))) 

attention_rate <- nrow(subset(dat,nchar(dat$attention) == 3))/nrow(subset(dat,!is.na(dat$attention)))

attrition_rate_1 <- nrow(subset(dat,is.na(dat$source_check_all)))/nrow(dat)
attrition_rate_2 <- nrow(subset(dat,is.na(dat$cancel_project)))/nrow(dat)

#3. main analysis####

#(1) hypothesis 1####

#accusatory vs. neutral
h1.mod1a <- lm_robust(support_arrest ~ accusatory_label, 
                      data = subset(dat, dat$accusatory_label == 1 | (dat$accusatory_label == 0 & dat$accommodative_label == 0)), se_type = "HC1")
h1.mod1b <- lm_robust(support_arrest ~ accusatory_label + male + married + party + income + pollution_care, 
                      data = subset(dat, dat$accusatory_label == 1 | (dat$accusatory_label == 0 & dat$accommodative_label == 0)), se_type = "HC1")

#accommodative vs. neutral
h1.mod2a <- lm_robust(support_arrest ~ accommodative_label, 
                      data = subset(dat, dat$accommodative_label == 1 | (dat$accusatory_label == 0 & dat$accommodative_label == 0)), se_type = "HC1")
h1.mod2b <- lm_robust(support_arrest ~ accommodative_label + male + married + party + income + pollution_care, 
                      data = subset(dat, dat$accommodative_label == 1 | (dat$accusatory_label == 0 & dat$accommodative_label == 0)), se_type = "HC1")

###TABLE 2###
texreg(list(h1.mod1a,h1.mod1b,h1.mod2a,h1.mod2b), include.ci = FALSE)


#(2) hypothesis 2####

#accusatory vs. neutral
h2a.mod1a <- lm_robust(support_arrest ~ accusatory_label*official_source, 
                      data = subset(dat, dat$accusatory_label == 1 | (dat$accusatory_label == 0 & dat$accommodative_label == 0)), se_type = "HC1")
h2a.mod1b <- lm_robust(support_arrest ~ accusatory_label*official_source + male + married + party + income + pollution_care, 
                      data = subset(dat, dat$accusatory_label == 1 | (dat$accusatory_label == 0 & dat$accommodative_label == 0)), se_type = "HC1")

#accommodative vs. neutral
h2a.mod2a <- lm_robust(support_arrest ~ accommodative_label*official_source, 
                      data = subset(dat, dat$accommodative_label == 1 | (dat$accusatory_label == 0 & dat$accommodative_label == 0)), se_type = "HC1")
h2a.mod2b <- lm_robust(support_arrest ~ accommodative_label*official_source + male + married + party + income + pollution_care, 
                      data = subset(dat, dat$accommodative_label == 1 | (dat$accusatory_label == 0 & dat$accommodative_label == 0)), se_type = "HC1")

###TABLE 3###
texreg(list(h2a.mod1a,h2a.mod1b,h2a.mod2a,h2a.mod2b), include.ci = FALSE)


#(3) the willingness to support for similar protest####

m.no_support <- lm_robust(no_support ~ accusatory_label  + male + married + party + income + pollution_care, 
                    data = subset(dat, dat$accommodative_label == 0), se_type = "HC1")
m.no_support.source <- lm_robust(no_support ~ accusatory_label*official_source  + male + married + party + income + pollution_care, 
                           data = subset(dat, dat$accommodative_label == 0), se_type = "HC1")

m.media_support <- lm_robust(media_support ~ accusatory_label  + male + married + party + income + pollution_care, 
                    data = subset(dat, dat$accommodative_label == 0), se_type = "HC1")
m.media_support.source <- lm_robust(media_support ~ accusatory_label*official_source  + male + married + party + income + pollution_care, 
                           data = subset(dat, dat$accommodative_label == 0), se_type = "HC1")

m.sign_support <- lm_robust(sign_support ~ accusatory_label  + male + married + party + income + pollution_care, 
                    data = subset(dat, dat$accommodative_label == 0), se_type = "HC1")
m.sign_support.source <- lm_robust(sign_support ~ accusatory_label*official_source  + male + married + party + income + pollution_care, 
                           data = subset(dat, dat$accommodative_label == 0), se_type = "HC1")

###TABLE 4###
texreg(list(m.no_support,m.no_support.source,m.media_support,m.media_support.source,
            m.sign_support,m.sign_support.source), include.ci = FALSE,
       custom.header = list("no support" = 1:2, "media post" = 3:4, "sign letters" = 5:6),
       float.pos = "!htbp", 
       caption = "",
       label = "table.support.protest.format")

dp <- dplyr::select(dat, no_support, media_support, sign_support, material_support, join_support, lead_support, group)
dp$group <- recode(dp$group, "t1a" = "control", "t2a" = "accusatory", "t3a" = "accommodative",
                   "t1b" = "control", "t2b" = "accusatory", "t3b" = "accommodative")
dp <- na.omit(dp)

dp1 <- dp %>%
  group_by(group, no_support) %>% 
  summarize (n = n()) %>%
  mutate(sum = sum(n), mean = n / sum(n), format = "no support")
dp1 <- filter(dp1, no_support == 1)

dp2 <- dp %>%
  group_by(group, media_support) %>% 
  summarize (n = n()) %>%
  mutate(sum = sum(n), mean = n / sum(n), format = "media post")
dp2 <- filter(dp2, media_support == 1)

dp3 <- dp %>%
  group_by(group, sign_support) %>% 
  summarize (n = n()) %>%
  mutate(sum = sum(n), mean = n / sum(n), format = "sign letter")
dp3 <- filter(dp3, sign_support == 1)

dp4 <- dp %>%
  group_by(group, material_support) %>% 
  summarize (n = n()) %>%
  mutate(sum = sum(n), mean = n / sum(n), format = "material support")
dp4 <- filter(dp4, material_support == 1)

dp5 <- dp %>%
  group_by(group, join_support) %>% 
  summarize (n = n()) %>%
  mutate(sum = sum(n), mean = n / sum(n), format = "join protest")
dp5 <- filter(dp5, join_support == 1)

dp6 <- dp %>%
  group_by(group, lead_support) %>% 
  summarize (n = n()) %>%
  mutate(sum = sum(n), mean = n / sum(n), format = "lead protest")
dp6 <- filter(dp6, lead_support == 1)

dp.all <- data.frame(rbind(dp1[,-2],dp2[,-2],dp3[,-2],dp4[,-2],dp5[,-2],dp6[,-2])) 
dp.all$format <- factor(dp.all$format, levels=unique(dp.all$format))
dp.all$group <- factor(dp.all$group, levels=c("accusatory","control","accommodative"))

p.support_format1 <- ggplot(dp.all[1:9,],aes(x=format,y=mean,fill=group))+
  geom_bar(stat="identity",position='dodge')+
  scale_fill_grey(start = 0, end = .9) +
  coord_cartesian(ylim = c(0, 0.75)) + 
  labs(y="proportion") + 
  theme_bw()

p.support_format2 <- ggplot(dp.all[10:18,],aes(x=format,y=mean,fill=group))+
  geom_bar(stat="identity",position='dodge')+
  scale_fill_grey(start = 0, end = .9) +
  coord_cartesian(ylim = c(0, 0.75)) + 
  labs(y="proportion") + 
  theme_bw()

###FIGURE SI.2###
grid.arrange(p.support_format1,p.support_format2,ncol=1)

# mechanism tests####

#(1) accusatory label and perceptions of the protesters ####

#perceptions of violence

mech.violence1 <- lm_robust(violence_all ~ accusatory_label, 
                       data = subset(dat, dat$accommodative_label == 0), se_type = "HC1")
mech.violence2 <- lm_robust(violence_all ~ accusatory_label + male + married + party + income + pollution_care, 
                       data = subset(dat, dat$accommodative_label == 0), se_type = "HC1")

#perceptions of improper action

mech.improper1 <- lm_robust(improper_all ~ accusatory_label, 
                         data = subset(dat, dat$accommodative_label == 0), se_type = "HC1")
mech.improper2 <- lm_robust(improper_all ~ accusatory_label + male + married + party + income + pollution_care, 
                         data = subset(dat, dat$accommodative_label == 0), se_type = "HC1")

#protesters' actions deserve support

mech.should_support1 <- lm_robust(should_support_all ~ accusatory_label, 
                            data = subset(dat, dat$accommodative_label == 0), se_type = "HC1")
mech.should_support2 <- lm_robust(should_support_all ~ accusatory_label + male + married + party + income + pollution_care, 
                             data = subset(dat, dat$accommodative_label == 0), se_type = "HC1")

###TABLE 5###
texreg(list(mech.violence1,mech.violence2,mech.improper1,mech.improper2,
            mech.should_support1,mech.should_support2), include.ci = FALSE,
       custom.header = list("violent" = 1:2, "improper" = 3:4, "deserve support" = 5:6),
       float.pos = "!htbp", 
       caption = "Accusatory Label on the Perception of Protesters",
       label = "table.mech.protester")

#(2) accusatory label and support for cancellation of pipeline####

mech.project1 <- lm_robust(cancel_project ~ accusatory_label + male + married + party + income + pollution_care, 
                                  data = subset(dat, dat$accommodative_label == 0), se_type = "HC1")
mech.project2 <- lm_robust(cancel_project ~ accusatory_label*official_source + male + married + party + income + pollution_care, 
                           data = subset(dat, dat$accommodative_label == 0), se_type = "HC1")

#(3) accusatory label and support for pardoning the decision-making officials ####

mech.official1 <- lm_robust(pardon_officials ~ accusatory_label + male + married + party + income + pollution_care, 
                                  data = subset(dat, dat$accommodative_label == 0), se_type = "HC1")
mech.official2 <- lm_robust(pardon_officials ~ accusatory_label*official_source + male + married + party + income + pollution_care, 
                            data = subset(dat, dat$accommodative_label == 0), se_type = "HC1")

###TABLE 6###
texreg(list(mech.official1,mech.official2,mech.project1,mech.project2), 
       include.ci = FALSE,
       custom.header = list("pardon officials" = 1:2, "cancel project" = 3:4),
       float.pos = "!htbp", 
       caption = "Accusatory Label on the Perception of Government and Project",
       label = "table.mech.other")

#4. check null source effects ####

#(1) differing perceptions of credibility####

credible.accuse1 <- lm_robust(content_credible ~ accusatory_label, data = subset(dat, dat$accommodative_label == 0), se_type = "HC1")
credible.accuse2 <- lm_robust(content_credible ~ accusatory_label + male + married + party + income + pollution_care, data = subset(dat, dat$accommodative_label == 0), se_type = "HC1")

credible.accuse.gov1 <- lm_robust(content_credible ~ official_source*accusatory_label, data = subset(dat, dat$accommodative_label == 0), se_type = "HC1")
credible.accuse.gov2 <- lm_robust(content_credible ~ official_source*accusatory_label + male + married + party + income + pollution_care, data = subset(dat, dat$accommodative_label == 0), se_type = "HC1")

###TABLE SI.8###
texreg(list(credible.accuse1,credible.accuse2,credible.accuse.gov1,credible.accuse.gov2), 
       include.ci = FALSE,
       float.pos = "!htbp", 
       caption = "Credibility of the Accusatory Label",
       label = "table.credible.accuse")

# when accommodating

credible.accom1 <- lm_robust(content_credible ~ accommodative_label, data = subset(dat, dat$accusatory_label == 0), se_type = "HC1")
credible.accom2 <- lm_robust(content_credible ~ accommodative_label + male + married + party + income + pollution_care, data = subset(dat, dat$accusatory_label == 0), se_type = "HC1")

credible.accom.gov1 <- lm_robust(content_credible ~ official_source*accommodative_label, data = subset(dat, dat$accusatory_label == 0), se_type = "HC1")
credible.accom.gov2 <- lm_robust(content_credible ~ official_source*accommodative_label + male + married + party + income + pollution_care, data = subset(dat, dat$accusatory_label == 0), se_type = "HC1")

###TABLE SI.9###
texreg(list(credible.accom1,credible.accom2,credible.accom.gov1,credible.accom.gov2), 
       include.ci = FALSE,
       float.pos = "!htbp", 
       caption = "Credibility of the Accommodative Label",
       label = "table.credible.accommodate")


dp.gov1 <- data.frame(tidy(lm_robust(content_credible ~ accusatory_label, 
                          data = subset(dat, dat$accommodative_label == 0 & dat$official_source == 1),
                          se_type = "HC1"),
                conf.int=T))
dp.sch1 <- data.frame(tidy(lm_robust(content_credible ~ accusatory_label, 
                                data = subset(dat, dat$accommodative_label == 0 & dat$official_source == 0),
                                se_type = "HC1"),
                      conf.int=T))

dp.all1 <- rbind(dp.gov1[2,],dp.sch1[2,])
dp.all1$source <- c("official","scholar")

p.credible1 <- ggplot(data=dp.all1, aes(x=source,y=estimate)) +
  geom_segment(aes(y=conf.low,yend=conf.high,x=source,xend=source), alpha = .75, size = 3, colour = "black")+
  geom_point(shape = 21, colour = "black", fill = "#FFFFFF", size = 3, stroke = 4) +
  geom_hline(yintercept = 0, linetype = 2)+
  coord_cartesian(ylim = c(-0.25,0.25)) +
  labs(x="accusatory label",y="treatment effect") 

dp.gov2 <- data.frame(tidy(lm_robust(content_credible ~ accommodative_label, 
                                    data = subset(dat, dat$accusatory_label == 0 & dat$official_source == 1),
                                    se_type = "HC1"),
                          conf.int=T))
dp.sch2 <- data.frame(tidy(lm_robust(content_credible ~ accommodative_label, 
                                    data = subset(dat, dat$accusatory_label == 0 & dat$official_source == 0),
                                    se_type = "HC1"),
                          conf.int=T))

dp.all2 <- rbind(dp.gov2[2,],dp.sch2[2,])
dp.all2$source <- c("official","scholar")

p.credible2 <- ggplot(data=dp.all2, aes(x=source,y=estimate)) +
  geom_segment(aes(y=conf.low,yend=conf.high,x=source,xend=source), alpha = .75, size = 3, colour = "black")+
  geom_point(shape = 21, colour = "black", fill = "#FFFFFF", size = 3, stroke = 4) +
  geom_hline(yintercept = 0, linetype = 2)+
  coord_cartesian(ylim = c(-0.25,0.25)) +
  labs(x="accommodative label",y="treatment effect") 

###FIGURE 2###
grid.arrange(p.credible1,p.credible2,ncol=2)

#(2) heterogeneous effects by party and prior attitudes####

m.party.hetfx.repress <- lm_robust(support_arrest ~ official_source*party, data = dat, se_type = "HC1")
m.party.hetfx.credible <- lm_robust(content_credible ~ official_source*party, data = dat, se_type = "HC1")

m.env.hetfx.repress <- lm_robust(support_arrest ~ official_source*pollution_serious, data = dat, se_type = "HC1")
m.env.hetfx.credible <- lm_robust(content_credible ~ official_source*pollution_serious, data = dat, se_type = "HC1")

###TABLE SI.10###
texreg(list(m.party.hetfx.repress, m.env.hetfx.repress, 
            m.party.hetfx.credible, m.env.hetfx.credible), include.ci = FALSE)


#(3) differing durations of responses####

#when accusing

duration.accuse1 <- lm_robust(log(duration) ~accusatory_label, data = subset(dat, dat$accommodative_label == 0), se_type = "HC1")
duration.accuse2 <- lm_robust(log(duration) ~accusatory_label + male + married + party + income + pollution_care, data = subset(dat, dat$accommodative_label == 0), se_type = "HC1")

duration.accuse.gov1 <- lm_robust(log(duration) ~official_source*accusatory_label, data = subset(dat, dat$accommodative_label == 0), se_type = "HC1")
duration.accuse.gov2 <- lm_robust(log(duration) ~official_source*accusatory_label + male + married + party + income + pollution_care, data = subset(dat, dat$accommodative_label == 0), se_type = "HC1")

###TABLE SI.11###
texreg(list(duration.accuse1,duration.accuse2,duration.accuse.gov1,duration.accuse.gov2), 
       include.ci = FALSE,
       float.pos = "!htbp", 
       caption = "Response Duration with the Accusatory Label",
       label = "table.duration.accuse")


# when accommodating

duration.accom1 <- lm_robust(log(duration) ~accommodative_label, data = subset(dat, dat$accusatory_label == 0), se_type = "HC1")
duration.accom2 <- lm_robust(log(duration) ~accommodative_label + male + married + party + income + pollution_care, data = subset(dat, dat$accusatory_label == 0), se_type = "HC1")

duration.accom.gov1 <- lm_robust(log(duration) ~official_source*accommodative_label, data = subset(dat, dat$accusatory_label == 0), se_type = "HC1")
duration.accom.gov2 <- lm_robust(log(duration) ~official_source*accommodative_label + male + married + party + income + pollution_care, data = subset(dat, dat$accusatory_label == 0), se_type = "HC1")

###TABLE SI.12###
texreg(list(duration.accom1,duration.accom2,duration.accom.gov1,duration.accom.gov2), 
       include.ci = FALSE,
       float.pos = "!htbp", 
       caption = "Response Duration with the Accommodative Label",
       label = "table.duration.accommodate")

